package rddSummary.transition.value_type

import org.apache.spark.{SparkConf, SparkContext}

object test_distinct {
  def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setAppName("test").setMaster("local")
    val context = new SparkContext(conf)

    val dates = context.makeRDD(List(1,2,3,4, 1, 2, 3),1)

    /**
     * 将数据集中重复的数据去重
     */
    val value = dates.distinct()

    value.collect().foreach(println)

    context.stop()
  }

}
